from sklearn.datasets import load_breast_cancer
import numpy as np
import pandas as pd

# # 1.完成数据加载
breast_cancer = load_breast_cancer()
X,y = load_breast_cancer(return_X_y=True)
# print(breast_cancer)
print(breast_cancer.keys())
# # print(breast_cancer.values())
# # 2.初步数据观察
#     # 2.1观察数据特征矩阵
#     #     2.1.1形状
# print(breast_cancer['data'])
# print(np.shape(breast_cancer['data']))
#     #     2.1.2有哪些特征
# print(breast_cancer['feature_names'])
#     # 2.2观察标签
#     #     2.2.1标签形状
# print(breast_cancer['target'])
#     #     2.2.2标签有哪些值
# print(np.shape(breast_cancer['target']))
#     #     2.2.3标签值的含义
# print(breast_cancer['target_names'])
#3.数据的统计特征
# cancers = pd.DataFrame(data=breast_cancer['data'],columns = breast_cancer['feature_names'])
# describe = cancers.describe()
# describe.to_excel('statistic.xlsx',sheet_name='statistic')
# target = breast_cancer['target']
# features = breast_cancer['data']
# print(features.mean(axis = 0))          #均值
# print(features.var(axis = 0))          #方差
# print(features.std(axis = 0))          #标准差
# print(features.max(axis = 0))          #最大值
# print(features.min(axis = 0))          #最小值
# cancers.to_excel('output.xlsx',sheet_name='ghn')